if(!require("EBImage")){
source("https://bioconductor.org/biocLite.R")
biocLite("EBImage")
}
## Loading required package: EBImage
## Warning: package 'EBImage' was built under R version 3.4.3
if(!require("gbm")){
install.packages("gbm")
}
## Loading required package: gbm
## Warning: package 'gbm' was built under R version 3.4.4
## Loaded gbm 2.1.4
library("EBImage")
library("gbm")
if(!require("xgboost")){
install.packages("xgboost")
}
## Loading required package: xgboost
## Warning: package 'xgboost' was built under R version 3.4.4
if(!require("plotly")){
install.packages("plotly")
}
## Loading required package: plotly
## Warning: package 'plotly' was built under R version 3.4.4
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.4.4
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:xgboost':
##
## slice
## The following object is masked from 'package:EBImage':
##
## toRGB
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library("EBImage")
library("gbm")
library("xgboost")
library("plotly")
Set the working directory to the image folder.In order to obain reproducible results, set.seed() whenever randomization is used.
set.seed(2018)
setwd("C:/43/paraalel/Fall2018-Proj3-Sec1-grp6")
Provide directories for training images. Low-resolution (LR) image set and High-resolution (HR) image set will be in different subfolders.
train_dir <- "../data/train_set/" # This will be modified for different data sets.
train_LR_dir <- paste(train_dir, "LR/", sep="")
train_HR_dir <- paste(train_dir, "HR/", sep="")
train_label_path <- paste(train_dir, "label.csv", sep="")
In this chunk, we have a set of controls for the evaluation experiments.
run.cv=TRUE # run cross-validation on the training set
K <- 3 # number of CV folds
run.feature.train=TRUE # process features for training set
run.test=TRUE # run evaluation on an independent test set
run.feature.test=TRUE # process features for test set
Using cross-validation or independent test set evaluation, we compare the performance of models with different specifications. In the baseline, we use GBM with different depth and trees.In the advanced model, we use XGBoost with different max_depth, eta, subsample, min_child_weight. In the following chunk,we list, depth and trees corresponding to GBM model that we will compare. Also, we list max_depth, eta, subsample, min_child_weight corresponding to Xgboost model that we will compare.
model_values <- expand.grid(depth = c(3,13), n.trees = c(150,300))
model_values_xgb <- expand.grid(max_depth = c(4,6,8), eta = c(0.3,0.5), subsample = c(0.5,0.8), min_child_weight = c(4,6,8))
feature.R wraps for feature engineering functions and options.
#setwd("~/Documents/GitHub/Fall2018-Proj3-Sec1-grp6")
source("../lib/feature.R")
tm_feature_train <- NA
if(run.feature.train){
tm_feature_train <- system.time(dat_train <- feature(train_LR_dir, train_HR_dir))
feat_train <- dat_train$feature
label_train <- dat_train$label
}
Call the train model and test model from library.
Model1: GBM
library("doParallel")
## Warning: package 'doParallel' was built under R version 3.4.4
## Loading required package: foreach
## Warning: package 'foreach' was built under R version 3.4.4
## Loading required package: iterators
## Warning: package 'iterators' was built under R version 3.4.4
## Loading required package: parallel
library("foreach")
source("../lib/train.gbmpar.R")
source("../lib/test.R")
source("../lib/train.xgboost.R")
source("../lib/cross_validation_gbmpar.R")
if(run.cv){
err_cv.gbmpar <- array(dim=c(nrow(model_values), 2))
for(k in 1:nrow(model_values)) {
cat("k=", k, "\n")
err_cv.gbmpar[k,] <- cv.function.gbmpar(feat_train, label_train, model_values[k, ], K)
}
save(err_cv.gbmpar, file="../output/err_cv.gbmpar.RData")
}
## k= 1
## k= 2
## k= 3
## k= 4
err_cv.gbmpar
## [,1] [,2]
## [1,] 0.003778984 1.959322e-04
## [2,] 0.003714726 9.298758e-04
## [3,] 0.003630574 1.800356e-04
## [4,] 0.003515067 6.315562e-05
Visualize cross-validation results.
print(Sys.time())
## [1] "2018-11-07 01:18:15 EST"
if(run.cv){
load("../output/err_cv.gbmpar.RData")
mv <- factor(paste("(", model_values$depth,",", model_values$n.trees, ")"))
plot(err_cv.gbmpar[,1]~mv, xlab="(Interaction Depth, Number of trees)", ylab="CV Error",
main="Cross Validation Error", type="n", ylim=c(0, 0.005))
points(err_cv.gbmpar[,1]~mv, col="blue", pch=16)
lines(err_cv.gbmpar[,1]~mv, col="blue")
#arrows(mv,err_cv[,1]-err_cv[,2], mv, err_cv[,1]+err_cv[,2],
#length=0.1, angle=90, code=3)
}
model_best=model_values[1]
if(run.cv){
model_best <- model_values[which.min(err_cv.gbmpar[,1]),]
}
par_best <- list(depth=model_best$depth, n.trees=model_best$n.trees)
par_best
## $depth
## [1] 13
##
## $n.trees
## [1] 300
PSNR_gbm <- 20*log10(1) - 10 * log10(err_cv.gbmpar[which.min(err_cv.gbmpar[,1]),1])
PSNR_gbm
## [1] 24.54066
tm_train_gbm=NA
tm_train_gbm <- system.time(fit_train.gbmpar <- train.gbmpar(feat_train, label_train, par_best))
save(fit_train.gbmpar, file="../output/fit_train.gbmpar.RData")
Model 2: XGBoost
source("../lib/cross_validation_xgb.R")
if(run.cv){
err_cv.xgb <- array(dim=c(nrow(model_values_xgb), 2))
for(k in 1:nrow(model_values_xgb)){
cat("k=", k, "\n")
err_cv.xgb[k,] <- cv.function.xgb(X.train=feat_train, y.train=label_train, modelvalues=model_values_xgb[k, ], K=K)
}
save(err_cv.xgb, file="../output/err_cv.xgb.RData")
}
## k= 1
## k= 2
## k= 3
## k= 4
## k= 5
## k= 6
## k= 7
## k= 8
## k= 9
## k= 10
## k= 11
## k= 12
## k= 13
## k= 14
## k= 15
## k= 16
## k= 17
## k= 18
## k= 19
## k= 20
## k= 21
## k= 22
## k= 23
## k= 24
## k= 25
## k= 26
## k= 27
## k= 28
## k= 29
## k= 30
## k= 31
## k= 32
## k= 33
## k= 34
## k= 35
## k= 36
err_cv.xgb
## [,1] [,2]
## [1,] 0.003399785 2.812109e-04
## [2,] 0.003469907 3.944959e-04
## [3,] 0.003516291 3.885324e-04
## [4,] 0.003627413 5.304099e-04
## [5,] 0.003725011 4.045517e-04
## [6,] 0.003785855 1.514141e-04
## [7,] 0.003319649 2.433532e-04
## [8,] 0.003437392 3.878908e-04
## [9,] 0.003561283 3.808719e-04
## [10,] 0.003415979 2.872261e-04
## [11,] 0.003486052 3.253142e-04
## [12,] 0.003586115 2.960904e-04
## [13,] 0.003366679 2.750453e-04
## [14,] 0.003422762 1.666533e-04
## [15,] 0.003464277 3.965385e-04
## [16,] 0.003436438 3.243575e-04
## [17,] 0.003571086 2.011602e-04
## [18,] 0.003686769 2.675746e-04
## [19,] 0.003299866 2.640279e-04
## [20,] 0.003396954 1.075917e-04
## [21,] 0.003438341 2.121268e-04
## [22,] 0.003402661 2.170707e-04
## [23,] 0.003474652 1.810802e-04
## [24,] 0.003519362 3.724450e-04
## [25,] 0.003356108 1.053242e-04
## [26,] 0.003374840 3.315921e-04
## [27,] 0.003400675 1.931607e-04
## [28,] 0.003387110 2.945585e-04
## [29,] 0.003489634 3.269490e-04
## [30,] 0.003667965 3.602869e-04
## [31,] 0.003299594 2.641026e-04
## [32,] 0.003371867 3.724390e-04
## [33,] 0.003343184 2.429026e-04
## [34,] 0.003354856 2.465940e-04
## [35,] 0.003495532 8.593389e-05
## [36,] 0.003503381 2.418258e-04
Visualize cross-validation results.
plot_ly(x=model_values_xgb$max_depth, y=model_values_xgb$eta, z=err_cv.xgb, type="surface")
model_best_xgb=model_values_xgb[1]
if(run.cv){
model_best_xgb <- model_values_xgb[which.min(err_cv.xgb[,1]),]
}
par_best_xgb <- list(max_depth=model_best_xgb$max_depth, eta=model_best_xgb$eta, subsample = model_best_xgb$subsample, min_child_weight = model_best_xgb$min_child_weight)
#par_best_xgb <- list()
PSNR_xgb <- 20*log10(1) - 10 * log10(err_cv.xgb[which.min(err_cv.xgb[,1]),1])
PSNR_xgb
## [1] 24.81539
tm_train_xgb=NA
tm_train_xgb <- system.time(fit_train.xgb <- train.xgboost(feat_train, label_train, par_best_xgb))
save(fit_train.xgb, file="../output/fit_train_xgb.RData")
Feed the final training model with the completely holdout testing data. + superResolution.R + Input: a path that points to the folder of low-resolution test images. + Input: a path that points to the folder (empty) of high-resolution test images. + Input: an R object that contains tuned predictors. + Output: construct high-resolution versions for each low-resolution test image.
setwd("C:/43/paraalel/Fall2018-Proj3-Sec1-grp6")
source("./lib/Superresolution.Parallel.R")
source("./lib/test.R")
test_dir <- "./data/test_sample/" # This will be modified for different data sets.
test_LR_dir <- paste(test_dir, "LR/", sep="")
test_HR_dir_GBM <- paste(test_dir, "HR/GBM/", sep="")
test_HR_dir_XGB <- paste(test_dir, "HR/XGB/", sep="")
# Output HR versions using GBM
tm_test_gbm=NA
if(run.test){
load(file="./output/fit_train.gbmpar.RData")
tm_test_gbm <- system.time(superResolution.par(LR_dir=test_LR_dir, HR_dir=test_HR_dir_GBM, modelList=fit_train.gbmpar))
}
# Output HR versions using XGBoost
tm_xgb_test=NA
if(run.test){
load(file="./output/fit_train_xgbpar.RData")
tm_test_xgb <- system.time(superResolution.par(LR_dir=test_LR_dir, HR_dir=test_HR_dir_XGB, modelList=fit_train.xgb))
}
Prediction performance matters, so does the running times for constructing features and for training the model, especially when the computation resource is limited.
cat("Time for constructing training features=", tm_feature_train[1], "s \n")
## Time for constructing training features= 111.21 s
#cat("Time for constructing testing features=", tm_feature_test[1], "s \n")
cat("Time for training GBM model=", tm_train_gbm[1], "s \n")
## Time for training GBM model= 0.11 s
cat("Time for GBM super-resolution=", tm_test_gbm[1], "s \n")
## Time for GBM super-resolution= 0.36 s
cat("Time for training XGBoost model=", tm_train_xgb[1], "s \n")
## Time for training XGBoost model= 0.31 s
cat("Time for XGBoost super-resolution=", tm_test_xgb[1], "s \n")
## Time for XGBoost super-resolution= 0.11 s